
cd "/Users/kevin/Dropbox/TZ ITNs"

import excel "district malaria data.xls", sheet("Sheet1") firstrow clear

/*
This do file enters and cleans district-level malaria prevalence values, from Chaki et al (p. 124-5)
(see bibilography for full reference and README file for explanation of the data cleaning procedures).
 */
 
drop in 1/1  
sort DISTNAME

rename pfpr2_102000 malaria_2000
drop region
gen index=_n

*******
*make changes to reflect different spellings or duplicates between the datasets.  
*******

drop if DISTNAME=="Dodoma urban" // duplicate
replace DISTNAME="Misungwi" if DISTNAME=="Missungwi" // alternate spelling
replace DISTNAME="Arusha Urban" if DISTNAME=="Arusha" // there is no "Arusha" in NPS data, only Arusha urban
replace DISTNAME="Kaskazini A" if DISTNAME=="Kaskazini 'A'"
replace DISTNAME="Kaskazini B" if DISTNAME=="Kaskazini 'B'"
replace DISTNAME="Chake Chake" if DISTNAME=="Chake"
replace DISTNAME="Mbeya Rural" if DISTNAME=="Mbeya (R)"

*****
*merge rural and urban pairs
*****

 foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n+1] if inlist(DISTNAME, "Bukoba Rural", "Dodoma Rural", "Iringa Rural", ///
  "Lindi Rural", "Musoma Rural")
  }  
  foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n+1] if inlist(DISTNAME, "Mbeya Rural", "Morogoro", "Mtwara Rural", ///
  "Sumbawanga Rural", "Singida Rural", "Shinyanga Rural" )
  }
  
/*
This represents 11 of the 14 urban councils which have to be added to the malaria data file. 
(Not Kigoma, or Songea Rural and Moshi. Moshi is addressed below and and there are no observations
from Kigoma or Songea Urban in NPS sample
so no need to imput a value for these districts.
Next we have cases where the NPS data only has a district name with either "urban" or "rural" appended to it
but Chaki et al have the district name without that urban/rural classfier. We change the name in 
the malaria data so that it merges with NPS. 
for exampe: 
Meru appears to be the urban council for Arumeru, see http://merudc.blogspot.com:  "Meru district council is among the two councils that form Arumeru district"
/*
because the malaria file does not have observations from councils 
such as those newly created by splits from old districts), we create them by adding an observation
to the malaria dataset, adding a variable and renaming it as the desired name. 
for 3 of these, NPS has both Kasulu/Masasi/Korogwe and Kasulu/Masasi/Korogwe Township Authority; 
Chaki et al. only has districts named Kasulu/Masasi/Korogwe. 
So I create a township authority district and assign it the malaria value of the similarly named district. 
*/
*/

**********
*reconcile district naming conventions
**********

replace DISTNAME="Musoma Municipal" if DISTNAME=="Musoma Urban"
replace DISTNAME="Musoma" if DISTNAME=="Musoma Rural"
replace DISTNAME="Moshi" if DISTNAME=="Moshi Rural" // in NPS data there is just Moshi (no urban or rural designation),  *but since Moshi is one of the districts where urban/rural were merged, change dist name from Moshi rural to Moshi
replace DISTNAME="Bukoba Urban" if DISTNAME=="Bukoba Rural" // There is no Bukoba Rural in NPS and Bukoba is one of the districts that is supposed to have urban/rural merged together. Therefore rename to enable merge into NPS.
replace DISTNAME="Kibaha Urban" if DISTNAME=="Kibaha" // There is only "Kibaha Urban" in NPS, and only "Kibaha" in Chaki et al. Change district name so merging is possible.
replace DISTNAME="Singida Urban" if DISTNAME=="Singida Rural" // There is only "Singida Urban" in NPS, both urban and rural in Chaki et al, and Singida is a district where urban and rural are to be merged, so rename urban as rural here. 
replace DISTNAME="Bahi" if DISTNAME=="Dodoma Rural" // There is no Bahi district in Chaki and no Dodoma rural in NPS. However Bahi is located directly next to Dodoma Urban and we treat it like an urbna/rural pair and assign it Dodoma's value. 
replace DISTNAME="Mpanda Urban" if DISTNAME=="Mpanda" // only "Mpanda" in Chaki et al, NPS has Mpanda urban and rural; so change Mpanda to Mpanda urban so merging is possible.
replace DISTNAME="Njombe Urban" if DISTNAME=="Njombe" // only "Njombe" in Chaki et al, only Njombe urban in NPS, rename so merging is possible. 
replace DISTNAME="Meru" if DISTNAME=="Arumeru" //  Meru appears to be the urban council for Arumeru, see http://merudc.blogspot.com:  "Meru district council is among the two councils that form Arumeru district"

moreobs 1
replace DISTNAME="Handeni Township Authority" if DISTNAME==""  //NPS has Handeni and Handeni Township in NPS. Create Handeni Township Authority, then replace blank values with values for obs ("Handeni") immediately before it. 
moreobs 1
replace DISTNAME="Babati Urban" if DISTNAME==""  //NPS has "Babati" and "Babati Urban." Chaki et al only has Babati. Create Babati Urban and give it the same value as Babati. 
moreobs 1
replace DISTNAME="Mpanda Rural" if DISTNAME==""  // *NPS has Mpanda Rural and Mpanda Urban. Chaki et al only has Mpanda. Above we made Mpanda Urban. Now we create Mpanda Rural and assign it the same values as Mpanda Urban. 
moreobs 1
replace DISTNAME="Kasulu Township Authority" if DISTNAME=="" 
moreobs 1
replace DISTNAME="Masasi Township Authority" if DISTNAME=="" 
moreobs 1
replace DISTNAME="Korogwe Township Authority" if DISTNAME=="" 

************
*Address district splits
/*
now we create new districts from the Chaki et al list, for the districts that we know have been split off
into new districts since 2010. 
we then assign the new "split district" the malaria values of its parent district.   
then for the new districts named "XX split", or renamed to address naming conflict between NPS and CHaki
we assign the parent district malaria values to the district
*/
*************

moreobs 1
replace DISTNAME="Mbinga split" if DISTNAME=="" // Nyasa district was created out of Mbinga, so create a "Mbinga split" district, later assign it Mbinga's malaria values, then later rename it 
moreobs 1
replace DISTNAME="Kasulu split" if DISTNAME=="" // Buhigwe district was created out of Mbinga, so create a "Kasulu split" dist then rename it 
moreobs 1
replace DISTNAME="Kigoma split" if DISTNAME=="" // Uvinza distrct was created out of Kigoma rural
moreobs 1
replace DISTNAME="Bihamarulo split" if DISTNAME=="" // Chato distrct was created out of Bihamarulo
moreobs 1
replace DISTNAME="Musoma split" if DISTNAME=="" // Butimama was created out of Musoma
moreobs 1
replace DISTNAME="Njombe split" if DISTNAME=="" // Wangingombe was created out of Njombe
moreobs 1
replace DISTNAME="Bariadi split" if DISTNAME=="" // Itilima was created out of Bariadi
moreobs 1
replace DISTNAME="Singida split" if DISTNAME==""  // Ikungi was created out of Singida
moreobs 1
replace DISTNAME="Mpanda split" if DISTNAME==""  // Mlele was created out of Mpanda
moreobs 1
replace DISTNAME="Urambo split" if DISTNAME=="" // Kaliua was created out of Urambo
moreobs 1
replace DISTNAME="Karagwe split" if DISTNAME=="" // Kyerwa was created out of Karagwe
moreobs 1
replace DISTNAME="Kondoa split" if DISTNAME=="" // Chemba was created out of Kondoa
moreobs
replace DISTNAME="Masasi split" if DISTNAME=="" // Nanyumbu was created out of Masasi,
moreobs
replace DISTNAME="Hai split" if DISTNAME==""  // SIha was created out of Hai
moreobs
replace DISTNAME="Bahi split" if DISTNAME==""

sort DISTNAME

 foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n-1] if inlist(DISTNAME, "Handeni Township Authority", ///
  "Babati Urban", "Kasulu Township Authority", "Korogwe Township Authority", ///
  "Masasi Township Authority") 
  } 
   foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n+1] if inlist(DISTNAME,  "Mpanda Rural")
  }
 
 foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n-1] if inlist(DISTNAME, "Mbinga split", "Kasulu split", "Kigoma split", ///
  "Musoma split", "Njombe split", "Bariadi split")
  }
 
 foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n-1] if inlist(DISTNAME,  "Singida split", "Mpanda split", "Urambo split", "Karagwe split", "Kondoa split", "Bahi split", "Masasi split", "Hai split")
  } 
   foreach varname of varlist malariafree2000-pfpr2_102012 {
  replace `varname'=`varname'[_n+1] if inlist(DISTNAME, "Bihamarulo split")
  } 
  
*********
*now relabel with new district names for the split districts
*********

replace DISTNAME="Nyasa" if DISTNAME=="Mbinga split" // // http://www.statoids.com/ytz.html
replace DISTNAME="Buhigwe" if DISTNAME=="Kasulu split" // http://www.statoids.com/ytz.html
replace DISTNAME="Uvinza" if DISTNAME=="Kigoma split" // http://www.statoids.com/ytz.html
replace DISTNAME="Chato" if DISTNAME=="Bihamarulo split" // https://en.wikipedia.org/wiki/Biharamulo_District
replace DISTNAME="Butiama" if DISTNAME=="Musoma split" // http://www.statoids.com/ytz.html
replace DISTNAME="Wanging'ombe" if DISTNAME=="Njombe split" // http://www.statoids.com/ytz.html
replace DISTNAME="Itilima" if DISTNAME=="Bariadi split" // http://www.statoids.com/ytz.html
replace DISTNAME="Ikungi" if DISTNAME=="Singida split" // http://www.statoids.com/ytz.html
replace DISTNAME="Mlele" if DISTNAME=="Mpanda split" // http://www.statoids.com/ytz.html
replace DISTNAME="Kaliua" if DISTNAME=="Urambo split" // http://www.statoids.com/ytz.html
replace DISTNAME="Kyerwa" if DISTNAME=="Karagwe split"  // http://www.statoids.com/ytz.html
replace DISTNAME="Chemba" if DISTNAME=="Kondoa split" // http://www.statoids.com/ytz.html
replace DISTNAME="Nanyumbu" if DISTNAME=="Masasi split" // http://www2.jica.go.jp/en/evaluation/pdf/2014_0961010_4.pdf, footnote 35
replace DISTNAME="Siha" if DISTNAME=="Hai split" // Siha council facebook page
replace DISTNAME="Chamwino" if DISTNAME=="Bahi split" // http://pdf.usaid.gov/pdf_docs/PA00KPVM.pdf

duplicates drop DISTNAME, force

*calculate the median district malaria prevalence 
sum malaria_2000, detail
egen median_prevalence = median(malaria_2000)
gen above_median  = malaria_2000>=median_prevalence

save "constructed data/districts_malaria", replace
 
